In [ ]:
# Dr. M. Baron, Statistical Machine Learning class, STAT-427/627
# Jackknife and Bootstrap for Traffic Problems
# Import necessary libraries
! pip install numpy;
! pip install scikit-learn;
! pip install statsmodels;
import numpy as np
from sklearn.utils import resample
import statsmodels.api as sm
In [ ]:
# Generate data
X = np.concatenate([np.repeat(0, 26), np.repeat(1, 10), np.repeat(2, 4)])
In [4]:
# Define Poisson estimator function
def P_poisson(X):
return np.exp(-np.mean(X))
# Poisson estimate
P_poisson_estimate = P_poisson(X)
print(f"Poisson estimate: {P_poisson_estimate}")
Poisson estimate: 0.6376281516217733
In [7]:
# Jackknife Bias Correction
# Jackknife function to reduce bias
def jackknife(X, estimator_func):
n = len(X)
leave_one_out_estimates = np.zeros(n)
for i in range(n):
X_leave_one_out = np.delete(X, i)
leave_one_out_estimates[i] = estimator_func(X_leave_one_out)
bias = (n - 1) * (np.mean(leave_one_out_estimates) - estimator_func(X))
jackknife_estimate = estimator_func(X) - bias
return {
'jackknife_estimate': jackknife_estimate,
'bias': bias
}
In [8]:
# Jackknife correction for Poisson estimator
JK_poisson = jackknife(X, P_poisson)
print(f"Jackknife Poisson estimate: {JK_poisson['jackknife_estimate']}, Bias: {JK_poisson['bias']}")
# Three Estimators
# First estimator (sample proportion)
# Second estimator (Poisson)
# Third estimator (Poisson with Jackknife correction)
# Define sample proportion estimator (Statistician A)
def P_hat(X, sample=None):
if sample is None:
sample = np.arange(len(X))
return np.mean(X[sample] == 0)
# Define Poisson estimator for bootstrap
def P_poisson_boot(X, sample=None):
if sample is None:
sample = np.arange(len(X))
return np.exp(-np.mean(X[sample]))
# Jackknife Poisson estimator function for bootstrap
def P_JK(X, sample=None):
if sample is None:
sample = np.arange(len(X))
n = len(sample)
P_i = np.zeros(n)
for i in range(n):
P_i[i] = P_poisson_boot(X, np.delete(sample, i))
return n * P_poisson_boot(X, sample) - (n - 1) * np.mean(P_i)
Jackknife Poisson estimate: 0.6339448955289935, Bias: 0.003683256092779863
In [9]:
# Apply the estimators on the original data
P_hat_estimate = P_hat(X)
P_poisson_estimate = P_poisson(X)
P_JK_estimate = JK_poisson['jackknife_estimate']
print(f"Sample Proportion estimate: {P_hat_estimate}")
print(f"Poisson estimate: {P_poisson_estimate}")
print(f"Jackknife-corrected Poisson estimate: {P_JK_estimate}")
Sample Proportion estimate: 0.65 Poisson estimate: 0.6376281516217733 Jackknife-corrected Poisson estimate: 0.6339448955289935
In [10]:
# Bootstrap Analysis
from scipy.stats import bootstrap
# Bootstrap function
def bootstrap_estimate(X, func, R=10000):
estimates = np.zeros(R)
n = len(X)
for i in range(R):
sample_indices = np.random.choice(np.arange(n), size=n, replace=True)
estimates[i] = func(X, sample_indices)
bias = np.mean(estimates) - func(X)
std_error = np.std(estimates)
return {
'original': func(X),
'bias': bias,
'std_error': std_error
}
# Apply bootstrap for the three estimators
bootstrap_hat = bootstrap_estimate(X, P_hat)
bootstrap_poisson = bootstrap_estimate(X, P_poisson_boot)
bootstrap_JK = bootstrap_estimate(X, P_JK)
print(f"Bootstrap sample proportion: {bootstrap_hat}")
print(f"Bootstrap Poisson: {bootstrap_poisson}")
print(f"Bootstrap Jackknife Poisson: {bootstrap_JK}")
Bootstrap sample proportion: {'original': 0.65, 'bias': -0.0007175000000000376, 'std_error': 0.07590815301237411} Bootstrap Poisson: {'original': 0.6376281516217733, 'bias': 0.004855433161392031, 'std_error': 0.06731707653859585} Bootstrap Jackknife Poisson: {'original': 0.633944895528991, 'bias': 0.0027164155417765956, 'std_error': 0.06704901657996373}
In [11]:
# Results Comparison
# Summary
print("\nBias Comparison:")
print(f"P_hat bias: {bootstrap_hat['bias']}")
print(f"P_poisson bias: {bootstrap_poisson['bias']}")
print(f"P_JK bias: {bootstrap_JK['bias']}")
print("\nStandard Error Comparison:")
print(f"P_hat std. error: {bootstrap_hat['std_error']}")
print(f"P_poisson std. error: {bootstrap_poisson['std_error']}")
print(f"P_JK std. error: {bootstrap_JK['std_error']}")
Bias Comparison: P_hat bias: -0.0007175000000000376 P_poisson bias: 0.004855433161392031 P_JK bias: 0.0027164155417765956 Standard Error Comparison: P_hat std. error: 0.07590815301237411 P_poisson std. error: 0.06731707653859585 P_JK std. error: 0.06704901657996373